# ================================================================================================= #
# Replication code for: Lying for Trump? Elite Cue-Taking and Expressive Responding on Vote Method  #
# Authors: Enrijeta Shino, Daniel A. Smith, and Laura Uribe                                         # 
# Journal: Public Opinion Quarterly                                                                 #
# Year: 2022                                                                                        #
# ================================================================================================= #

# Clear environment 
rm(list = ls())

# Libraries
library(data.table)
library(survey)
library(questionr)
library(weights)
library(SortedEffects)
library(sandwich)
library(lmtest)
library(ggplot2)
library(ggeffects)
library(ggpubr)
library(texreg)

# Dataset
load("/Users/Enrijeta/Dropbox/My Mac (MacBook-Pro.local)/Desktop/POQ_FinalSubmission/replication_dataset/flsurvey_dta.Rdata")

#===================#
# Variable recoding #
#===================#

# Vote intent November 2020: 0 = Biden; 1 = Trump
flsurvey_dta$vote20_trump <- with(flsurvey_dta, ifelse(vote_intent == 2, 0,
                                                ifelse(vote_intent == 1, 1, NA)))
table(flsurvey_dta$vote20_trump)


# Follow news about politics: 1 = not at all ---> 5 = a great deal
flsurvey_dta$political_awareness <- with(flsurvey_dta, ifelse(NEWS == "None at all", 1, 
                                                       ifelse(NEWS == "A little", 2, 
                                                       ifelse(NEWS == "A moderate amount", 3, 
                                                       ifelse(NEWS == "A lot", 4, 
                                                       ifelse(NEWS == "A great deal", 5, NA))))))
table(flsurvey_dta$political_awareness)


# 2016 General Election vote: 
# 1 = VBM; 2 = ED, 3 = EIP. Drop missing values
flsurvey_dta$gen16 <- with(flsurvey_dta, ifelse(gen16.x == "A" | gen16.x == "B", 1, 
                                         ifelse(gen16.x == "Y" | gen16.x == "P", 2, 
                                         ifelse(gen16.x == "E", 3, NA))))
table(flsurvey_dta$gen16)


# 2018 General Election vote: 
# 1 = VBM; 2 = ED, 3 = EIP. Drop non-voters
flsurvey_dta$gen18 <- with(flsurvey_dta, ifelse(gen18.x == "A" | gen18.x == "B", 1, 
                                         ifelse(gen18.x == "Y" | gen18.x == "P", 2, 
                                         ifelse(gen18.x == "E", 3, NA))))
table(flsurvey_dta$gen18)


# 2020 General Election vote: 
# 1 = VBM; 2 = ED, 3 = EIP. drop non-voters
flsurvey_dta$gen20 <- with(flsurvey_dta, ifelse(VoteHistoryCode == "A" | VoteHistoryCode == "B", 1, 
                                         ifelse(VoteHistoryCode == "Y", 2, 
                                         ifelse(VoteHistoryCode == "E", 3, NA))))
table(flsurvey_dta$gen20)


# 2016 Primary Vote: 
# 1 = VBM; 2 = ED, 3 = EIP
flsurvey_dta$pri16 <- with(flsurvey_dta, ifelse(pri16.x == "A" | pri16.x == "B", 1, 
                                         ifelse(pri16.x == "Y" | pri16.x == "P", 2, 
                                         ifelse(pri16.x == "E", 3, NA))))
table(flsurvey_dta$pri16)


# 2018 Primary Vote: 
# 1 = VBM; 2 = ED, 3 = EIP
flsurvey_dta$pri18 <- with(flsurvey_dta, ifelse(pri18.x == "A" | pri18.x == "B", 1, 
                                         ifelse(pri18.x == "Y" | pri18.x == "P", 2, 
                                         ifelse(pri18.x == "E", 3, NA))))
table(flsurvey_dta$pri18)


# Voted by mail in November 2020: 1 = yes, 0 = no
flsurvey_dta$vote_vbm20 <- ifelse(flsurvey_dta$gen20 == 1, 1, 0)
table(flsurvey_dta$vote_vbm20)


# Vote method November 2020: 0 = unsure; 1 = VBM; 2 = ED, 3 = EIP
flsurvey_dta$vote20_method <- with(flsurvey_dta, ifelse(VOTE20_METH == "Unsure", 0, 
                                                 ifelse(VOTE20_METH == "By mail", 1,
                                                 ifelse(VOTE20_METH == "In person on Election Day", 2,
                                                 ifelse(VOTE20_METH == "In person at an early voting location", 3, NA)))))
table(flsurvey_dta$vote20_method)


# Vote method: 0 = it varies; 1 = VBM; 2 = ED, 3 = EIP
flsurvey_dta$vote_method <- with(flsurvey_dta, ifelse(VOTE_METH == "It varies", 0, 
                                               ifelse(VOTE_METH == "By mail", 1,
                                               ifelse(VOTE_METH == "In person on Election Day", 2,
                                               ifelse(VOTE_METH == "In person at an early voting location", 3, NA)))))
table(flsurvey_dta$vote_method)


# VBM 2020 vote validated, using self-reported vote method and the voter file item: 1 = VBM misreport, 0 = other 
flsurvey_dta$votevbm20_misreport <- ifelse(flsurvey_dta$vote20_method != 1 & flsurvey_dta$vote_vbm20 == 1, 1, 0)
table(flsurvey_dta$votevbm20_misreport)


# Habitual VBM voter dummy: 1 = voted by mail in both Gen2016 and Gen2018; 0 = otherwise
flsurvey_dta$habitual_vbm_gen1618 <- with(flsurvey_dta, ifelse(gen18 == 1 & gen16 == 1, 1, 0))
table(flsurvey_dta$habitual_vbm_gen1618)


# Habitual VBM voter dummy: 1 = voted by mail in all Gen and Pri 2016 and 2018 election; 0 = otherwise
flsurvey_dta$habitual_vbm_genpri1618 <- with(flsurvey_dta, ifelse(gen18 == 1 & gen16 == 1 & pri16 == 1 & pri18 == 1, 1, 0))
table(flsurvey_dta$habitual_vbm_genpri1618)


# Habitual in-person voter dummy: 1 = voted in person in both Gen2016 and Gen2018; 0 = otherwise
flsurvey_dta$habitual_inperson_gen1618 <- with(flsurvey_dta, ifelse((gen18 == 2 | gen18 == 3) & (gen16 == 2 | gen16 == 3) ,1, 0))
table(flsurvey_dta$habitual_inperson_gen1618)


# Habitual in-person voter: 1 = voted in person in all Gen and Pri 2016 and 2018 elections; 0 = otherwise
flsurvey_dta$habitual_inperson_genpri1618 <- with(flsurvey_dta, ifelse((gen18 == 2 | gen18 == 3) & (gen16 == 2 | gen16 == 3) &
                                                                       (pri18 == 2 | pri18 == 3) & (pri16 == 2 | pri16 == 3),1, 0))
table(flsurvey_dta$habitual_inperson_genpri1618)


# Past vbm misreport if voted by mail in both Gen16 and Gen18: 1 = misreport; 0 = otherwise
flsurvey_dta$past_vbmgen1618_miresport <- with(flsurvey_dta, ifelse(habitual_vbm_gen1618 == 1 & vote_method != 1, 1, 0))
table(flsurvey_dta$past_vbmgen1618_miresport)


# Past vbm misreport if voted by mail in both Gen16 and Gen18: 1 = misreport; 0 = otherwise
flsurvey_dta$past_vbmgenpri1618_miresport <- with(flsurvey_dta, ifelse(habitual_vbm_genpri1618 == 1 & vote_method != 1, 1, 0))
table(flsurvey_dta$past_vbmgenpri1618_miresport)


# In-person ballots cast: dummies for in-person ballots for each election
flsurvey_dta$inperson_gen16 <- ifelse(flsurvey_dta$gen16 == 2 | flsurvey_dta$gen16 == 3, 1, 0)
flsurvey_dta$inperson_gen18 <- ifelse(flsurvey_dta$gen18 == 2 | flsurvey_dta$gen18 == 3, 1, 0)
flsurvey_dta$inperson_pri16 <- ifelse(flsurvey_dta$pri16 == 2 | flsurvey_dta$pri16 == 3, 1, 0)
flsurvey_dta$inperson_pri18 <- ifelse(flsurvey_dta$pri18 == 2 | flsurvey_dta$pri18 == 3, 1, 0)


# Validated Gen16 and Gen18: Usually have not voted in-person in the past: 1 = no in-person voting; 0 = usually vote in-person. 
flsurvey_dta$past_inpersonmisreportgen1618 <- with(flsurvey_dta, ifelse((vote_method != 2 | vote_method != 3) & 
                                                                        (inperson_gen16 == 1 & inperson_gen18 == 1), 1, 0))
table(flsurvey_dta$past_inpersonmisreportgen1618)


# Validated Gen16 and Gen18: Usually have not voted in-person in the past: 1 = no in-person voting; 0 = usually vote in-person. 
flsurvey_dta$past_inpersonmisreportgenpri1618 <- with(flsurvey_dta, ifelse((vote_method != 2 | vote_method != 3) & 
                                                                           (inperson_gen16 == 1 & inperson_gen18 == 1 &
                                                                            inperson_pri16 == 1 & inperson_pri18 == 1), 1, 0))
table(flsurvey_dta$past_inpersonmisreportgenpri1618)


# Validated: November 2020 vote method for in-person voting: 1 = misreport, saying will vote in person but voted by mail; 0 = otherwise
flsurvey_dta$inpersonmisreport_nov20 <- with(flsurvey_dta, 
                                             ifelse((vote20_method == 2 | vote20_method == 3) & gen20 == 1, 1, 0))
table(flsurvey_dta$inpersonmisreport_nov20)


# Gender voter file: 1 = female, 0 = male
flsurvey_dta$female <- with(flsurvey_dta, ifelse(Gender == "M", 0, 
                                          ifelse(Gender == "F", 1, NA)))

table(flsurvey_dta$female)


# Age: 1 = 18-24; 2 = 25-34; 3 = 35-44; 4 = 45-54; 5 = 55-64; 6 = 65+
flsurvey_dta$age <-  as.numeric(flsurvey_dta$AGE)
flsurvey_dta$age <- with(flsurvey_dta, ifelse(age == 4, 1,
                                       ifelse(age == 5, 2,
                                       ifelse(age == 6, 3, 
                                       ifelse(age == 7, 4,
                                       ifelse(age == 8, 5, 
                                       ifelse(age == 9, 6, NA)))))))
table(flsurvey_dta$age)


# Education: 1 = high school or less, 2 = some college, 3 = college degree, 4 = graduate degree
flsurvey_dta$education <- as.numeric(flsurvey_dta$EDU)
flsurvey_dta$education <- with(flsurvey_dta, ifelse(education == 7 | education == 6, 1,
                                             ifelse(education == 8, 2,
                                             ifelse(education == 4| education == 9, 3,
                                             ifelse(education == 5, 4, NA)))))
table(flsurvey_dta$education)


# Race voter file: 1 = white, 2 = black, 3 = hispanic, 4 = other 
flsurvey_dta$race_voterfile <- with(flsurvey_dta, ifelse(Race == "5", 1, 
                                                  ifelse(Race == "3", 2,
                                                  ifelse(Race == "4", 3, 4))))
table(flsurvey_dta$race_voterfile)


# Race: 1 = white, 0 = other
flsurvey_dta$white <- ifelse(flsurvey_dta$race_voterfile == 1, 1, 0)
table(flsurvey_dta$white)


# Contract Covid19: 1 = not concerned, 2 = neither nor, 3 = concerned
flsurvey_dta$contract_covid19_3 <- with(flsurvey_dta, ifelse(INFECT_1 == "Not Concerned" |
                                                             INFECT_1 == "Not Very Concerned", 1, 
                                                      ifelse(INFECT_1 == "Neither Concerned nor Not Concerned", 2, 
                                                      ifelse(INFECT_1 == "Somewhat Concerned" | 
                                                             INFECT_1 == "Very Concerned", 3, NA))))
table(flsurvey_dta$contract_covid19_3)


# Ideology: 1 Very liberal ---> 7 Very conservative
flsurvey_dta$ideo7 <- with(flsurvey_dta, ifelse(IDEO == "Very conservative ", 7, 
                                         ifelse(IDEO == "Conservative ", 6, 
                                         ifelse(IDEO == "Slightly conservative ", 5, 
                                         ifelse(IDEO == "Moderate, middle of the road ", 4, 
                                         ifelse(IDEO == "Slightly liberal ", 3, 
                                         ifelse(IDEO == "Liberal ", 2, 
                                         ifelse(IDEO == "Very liberal ", 1, NA))))))))

table(flsurvey_dta$ideo7, useNA = "ifany")


# Party registration: 1 = NPA, 2 = Dem, 3 = Rep, 4 = Other 
flsurvey_dta$pid <- as.character(flsurvey_dta$PartyAffiliation)
flsurvey_dta$pid <- with(flsurvey_dta, ifelse(pid == "NPA", 1, 
                                       ifelse(pid == "DEM", 2, 
                                       ifelse(pid == "REP", 3, 4))))
table(flsurvey_dta$pid)


# Relevel party registration and make NPA base category
flsurvey_dta$pid1 <- factor(flsurvey_dta$pid)
flsurvey_dta$pid1 <- relevel(flsurvey_dta$pid1, ref = "1")
flsurvey_dta$pid1[flsurvey_dta$pid1 == 4] <- NA
table(flsurvey_dta$pid1)


# Drop NPAs from primary elections  
flsurvey_dta$pid2 <- flsurvey_dta$pid1
flsurvey_dta$pid2[flsurvey_dta$pid2 == 1] <- NA
table(flsurvey_dta$pid2)


# Make D the baseline category in pid2
flsurvey_dta$pid2 <- relevel(flsurvey_dta$pid2, ref = "2")


#==========# 
# Table 1  #
#==========# 

# Subset those who have voted VBM in 2016 and 2018, and in the 2020 General Election
flsurvey_dta <- data.table(flsurvey_dta)
vbm_voter1618 <- flsurvey_dta[flsurvey_dta$gen18 == 1 & flsurvey_dta$gen16 == 1]
vbm_voter20 <-  flsurvey_dta[flsurvey_dta$gen20 == 1]


# Vote method misreporting: 1 = misreporting, 0 = correct 
vbm_voter1618$vmethod_misreport <- ifelse(vbm_voter1618$vote_method == 1, 0, 1)
table(vbm_voter1618$vmethod_misreport)


# Vote method misreporting: 1 = misreporting, 0 = correct 
vbm_voter20$vmethod_misreport <- ifelse(vbm_voter20$vote20_method == 1, 0, 1)
table(vbm_voter20$vmethod_misreport)


# Table 1 output
crosstab(vbm_voter1618$vmethod_misreport, vbm_voter1618$pid1, prop.c = T)
crosstab(vbm_voter20$vmethod_misreport, vbm_voter20$pid1, prop.c = T)


#=====================================================#
# Table 2: Full Sample Model Estimation.              #
# DV: 1 = Don't usually vote by mail; 0 = otherwise   #
# Gen16 and Gen18                                     #
#=====================================================#


# Baseline model for Gen16 and Gen18
m1_baseline <- glm(past_vbmgen1618_miresport ~ vote20_trump + contract_covid19_3 + political_awareness + 
                   pid1 + ideo7 + age + female + white + education, data = flsurvey_dta, family = "binomial")
summary(m1_baseline)

# Huber-White robust standard errors, clustered by county
coeftest(m1_baseline, vcov=vcovHC(m1_baseline, type="HC0", cluster="county"))


# Model interacting Trump and political awareness
m1_trumpnews_int <- glm(past_vbmgen1618_miresport ~ vote20_trump + contract_covid19_3 + political_awareness + 
                        pid1 + ideo7 + age + female + white + education + political_awareness:vote20_trump, 
                        data = flsurvey_dta, family = "binomial")
summary(m1_trumpnews_int)

# Huber-White robust standard errors, clustered by county
coeftest(m1_trumpnews_int, vcov=vcovHC(m1_trumpnews_int, type="HC0", cluster="county"))


# Baseline model for Gen16 and Gen18, and Pri16 and Pri18
m1_baseline_genpri <- glm(past_vbmgenpri1618_miresport ~ vote20_trump + contract_covid19_3 + political_awareness + 
                          pid2 + ideo7 + age + female + white + education, data = flsurvey_dta, family = "binomial")
summary(m1_baseline_genpri)

# Huber-White robust standard errors, clustered by county
coeftest(m1_baseline_genpri, vcov=vcovHC(m1_baseline_genpri, type="HC0", cluster="county"))


# Model interacting Trump and political awareness
m1_trumpnews_int_genpri <- glm(past_vbmgenpri1618_miresport ~ vote20_trump + contract_covid19_3 + political_awareness + 
                               pid2 + ideo7 + age + female + white + education + political_awareness:vote20_trump, 
                               data = flsurvey_dta, family = "binomial")
summary(m1_trumpnews_int_genpri)

# Huber-White robust standard errors, clustered by county
coeftest(m1_trumpnews_int_genpri, vcov=vcovHC(m1_trumpnews_int_genpri, type="HC0", cluster="county"))


#==============#
# Model Output #
#==============#

# Model Output
texreg(list(m1_baseline, m1_trumpnews_int, m1_baseline_genpri, m1_trumpnews_int_genpri),
       caption="Full Sample, Misreporting Past VBM Usage",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("m1_baseline", "m1_trumpnews_int", "m1_baseline_genpri", "m1_trumpnews_int_genpri"),
       override.se=list(summary(m1_baseline)$coef[,2],
                        summary(m1_trumpnews_int)$coef[,2],
                        summary(m1_baseline_genpri)$coef[,2],
                        summary(m1_trumpnews_int_genpri)$coef[,2],
                        override.pval=list(summary(m1_baseline)$coef[,4],
                                           summary(m1_trumpnews_int)$coef[,4],
                                           summary(m1_baseline_genpri)$coef[,4],
                                           summary(m1_trumpnews_int_genpri)$coef[,4])))


#===================================================#
# Table 3: Full Sample Model Estimate.              #
# DV: 1 = VBM 20 Misreport validated; 0 = otherwise #
#===================================================#

# Baseline model.
m2_baseline <- glm(votevbm20_misreport ~ vote20_trump + contract_covid19_3 + past_vbmgen1618_miresport + political_awareness + 
                   pid1 + ideo7 + age + female + white + education, data = flsurvey_dta, family = "binomial")
summary(m2_baseline)

# Huber-White robust standard errors, clustered by county
coeftest(m2_baseline, vcov=vcovHC(m2_baseline, type="HC0", cluster="county"))


# Model interacting Trump and political awareness
m2_trumpnews_int <- glm(votevbm20_misreport ~ vote20_trump + contract_covid19_3 + past_vbmgen1618_miresport + political_awareness + 
                        pid1 + ideo7 + age + female + white + education + political_awareness:vote20_trump, 
                        data = flsurvey_dta, family = "binomial")
summary(m2_trumpnews_int)

# Huber-White robust standard errors, clustered by county
coeftest(m2_trumpnews_int, vcov=vcovHC(m2_trumpnews_int, type="HC0", cluster="county"))


# Baseline model.
m2_baseline_genpri <- glm(votevbm20_misreport ~ vote20_trump + contract_covid19_3 + past_vbmgenpri1618_miresport + 
                          political_awareness + pid2 + ideo7 + age + female + white + education, 
                          data = flsurvey_dta, family = "binomial")
summary(m2_baseline_genpri)

# Huber-White robust standard errors, clustered by county
coeftest(m2_baseline_genpri, vcov=vcovHC(m2_baseline_genpri, type="HC0", cluster="county"))


# Model interacting Trump and political awareness
m2_trumpnews_int_genpri <- glm(votevbm20_misreport ~ vote20_trump + contract_covid19_3 + past_vbmgenpri1618_miresport + 
                               political_awareness + pid2 + ideo7 + age + female + white + education + 
                               political_awareness:vote20_trump, data = flsurvey_dta, family = "binomial")
summary(m2_trumpnews_int_genpri)

# Huber-White robust standard errors, clustered by county
coeftest(m2_trumpnews_int_genpri, vcov=vcovHC(m2_trumpnews_int_genpri, type="HC0", cluster="county"))


#==============#
# Model Output #
#==============#

# Model Output
texreg(list(m2_baseline, m2_trumpnews_int, m2_baseline_genpri, m2_trumpnews_int_genpri),
       caption="Full Sample, Misreporting Nov20 VBM vote",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("m2_baseline",  "m2_trumpnews_int"),
       override.se=list(summary(m2_baseline)$coef[,2],
                        summary(m2_trumpnews_int)$coef[,2],
                        summary(m2_baseline_genpri)$coef[,2],
                        summary(m2_trumpnews_int_genpri)$coef[,2],
                        override.pval=list(summary(m2_baseline)$coef[,4],
                                           summary(m2_trumpnews_int)$coef[,4],
                                           summary(m2_baseline_genpri)$coef[,4],
                                           summary(m2_trumpnews_int_genpri)$coef[,4])))


#####################################
#===================================#
# Placebo Test for In-Person Voting #
#===================================#
#####################################

#=====================================================#
# Table 4: Full Sample Model Estimate.                #
# DV: 1 = Don't usually vote in-person; 0 = otherwise #
# Gen 16 and Gen 18                                   #
#=====================================================#

# Baseline model for retrospective in-person voting misreport
m3_baseline <- glm(past_inpersonmisreportgen1618 ~ vote20_trump + contract_covid19_3 + political_awareness + 
                   pid1 + ideo7 + age + female + white + education, data = flsurvey_dta, family = "binomial")
summary(m3_baseline)

# Huber-White robust standard errors, clustered by county
coeftest(m3_baseline, vcov=vcovHC(m3_baseline, type="HC0", cluster="county"))


# Model interacting Trump and political awareness
m3_trumpnews_int <- glm(past_inpersonmisreportgen1618 ~ vote20_trump + contract_covid19_3 + political_awareness + 
                        pid1 + ideo7 + age + female + white + education + political_awareness:vote20_trump, 
                        data = flsurvey_dta, family = "binomial")
summary(m3_trumpnews_int)

# Huber-White robust standard errors, clustered by county
coeftest(m3_trumpnews_int, vcov=vcovHC(m3_trumpnews_int, type="HC0", cluster="county"))


#===================================================#
# Full Sample Model Estimate.                       #
# DV: 1 = Won't vote in-person Nov20; 0 = otherwise #
# Gen 2020                                          #
#===================================================#

# Baseline model. For prospective in-person voting misreport
m4_baseline_nov20 <- glm(inpersonmisreport_nov20 ~ vote20_trump + contract_covid19_3 + political_awareness + 
                         pid1 + ideo7 + age + female + white + education + past_inpersonmisreportgen1618, 
                         data = flsurvey_dta, family = "binomial")
summary(m4_baseline_nov20)

# Huber-White robust standard errors, clustered by county
coeftest(m4_baseline_nov20, vcov=vcovHC(m4_baseline_nov20, type="HC0", cluster="county"))


# Model interacting Trump and political awareness
m4_trumpnews_int_nov20 <- glm(inpersonmisreport_nov20 ~ vote20_trump + contract_covid19_3 + political_awareness + 
                              pid1 + ideo7 + age + female + white + education + past_inpersonmisreportgen1618 + 
                              political_awareness:vote20_trump, data = flsurvey_dta, family = "binomial")
summary(m4_trumpnews_int_nov20)

# Huber-White robust standard errors, clustered by county
coeftest(m4_trumpnews_int_nov20, vcov=vcovHC(m4_trumpnews_int_nov20, type="HC0", cluster="county"))


#==============#
# Model Output #
#==============#

# Model Output
texreg(list(m3_baseline, m3_trumpnews_int, m4_baseline_nov20, m4_trumpnews_int_nov20),
       caption="Full Sample, Reporting to not usually vote in-person",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("m3_baseline", "m3_trumpvbm_int", "m4_baseline_nov20", "m4_trumpnews_int_nov20"),
       override.se=list(summary(m3_baseline)$coef[,2],
                        summary(m3_trumpnews_int)$coef[,2],
                        summary(m4_baseline_nov20)$coef[,2],
                        summary(m4_trumpnews_int_nov20)$coef[,2],
                        override.pval=list(summary(m3_baseline)$coef[,4],
                                           summary(m3_trumpnews_int)$coef[,4],
                                           summary(m4_baseline_nov20)$coef[,4],
                                           summary(m4_trumpnews_int_nov20)$coef[,4])))


#=============================================================================#
# Figure 2: Marginal Effects for Retrospective Vote Method Misreport, Model 2 #
#=============================================================================#

m1_trumpnews_int <- glm(past_vbmgen1618_miresport ~ vote20_trump + contract_covid19_3 + political_awareness + 
                        pid1 + ideo7 + age + female + white + education + political_awareness:vote20_trump, 
                        data = flsurvey_dta, family = "binomial")
summary(m1_trumpnews_int)

flsurvey_dta<- data.frame(flsurvey_dta)


# Get predicted probabilities 
vote_notrump <- ggpredict(m1_trumpnews_int, terms = c("vote20_trump", "political_awareness"), 
                          typical = "mean", ci.lvl = 0.95, vcov.fun = "vcovCL")
vote_notrump <- data.table(vote_notrump)


# Plot 1
# Plot the graph: Non- Trump support and COVID-19 by PID
vote_notrump <- data.frame(vote_notrump)

# The errorbars overlapped, so use position_dodge to move them horizontally
pd <- position_dodge(0.5) # move them .05 to the left and right


plot_margin1 <- ggplot(vote_notrump, aes(x = x, y = predicted, 
                                         colour = group)) + 
  
  # add the coefficients
  geom_point(aes(shape = group), position = pd, size=4) + 
  
  # remove the automatically generated legend
  guides(shape="none") + 
  
  # add a ribbon with the confidence band 
  geom_errorbar(
    aes(
      # lower and upper bound of the ribbon
      ymin = conf.low, ymax = conf.high,
      colour = group
    ), position = pd, width = .01, size = 1) + 
  
  # break y-axis every 0.25 and show it in percentages
  scale_y_continuous(breaks=seq(0, 0.20, by = 0.025), labels = scales::percent, limits=c(0, 0.20), expand = c(0, 0)) + 
  xlab("") +
  ylab("Probability of VBM Retrospective Misreport") + 
  scale_x_continuous(breaks=c(0, 1), 
                     labels = c("0" = "Non Trump Supporter", "1" = "Trump Supporter"), expand = c(0, 0)) +
  
  # add the shapes in order: square, circle, and triangle 
  scale_shape_manual(values = c(15, 16, 17, 18, 8)) + 
  
  # set the shapes of points in legend 
  guides(colour = guide_legend(override.aes = list(shape = c(15, 16, 17, 18, 8)))) +
  
  # remove background, set legend position bottom
  theme_bw() + theme(text = element_text(size = 15), #panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(), legend.position = "bottom", 
                     axis.title = element_text(size = 10),
                     axis.text.y = element_text(size = 10),
                     axis.text.x = element_text(size = 12),
                     legend.text = element_text(size = 10),
                     legend.title=element_text(size=10)) +
  # legend title
  scale_color_discrete(name="Political Awareness")

  plot_margin1 + scale_colour_manual(name="Political Awareness", 
                                   values = c("1" = "#000000", "2" = "#000000", 
                                              "3" = "#000000", "4" = "#000000", "5" = "#000000"))

# save graph on overleaf
ggsave(filename = "/Users/Enrijeta/Dropbox/Apps/Overleaf/POQ_VBM_Misremember/plots/trump_news.pdf", width = 6, height = 6)


#===========================================================================#
# Figure 3: Marginal Effects for Prospective Vote Method Misreport, Model 2 #
#===========================================================================#

# Model interacting Trump and political awareness
m2_trumpnews_int <- glm(votevbm20_misreport ~ vote20_trump + contract_covid19_3 + past_vbmgen1618_miresport + political_awareness + 
                        pid1 + ideo7 + age + female + white + education + political_awareness:vote20_trump, 
                        data = flsurvey_dta, family = "binomial")
summary(m2_trumpnews_int)


# Get predicted probabilities 
vote_trump <- ggpredict(m2_trumpnews_int, terms = c("vote20_trump", "political_awareness"), 
                        typical = "mean", ci.lvl = 0.95, vcov.fun = "vcovCL")
vote_trump <- data.table(vote_trump)


# Plot 1
# Plot the graph: Non- Trump support and COVID-19 by PID
vote_trump <- data.frame(vote_trump)

# The errorbars overlapped, so use position_dodge to move them horizontally
pd <- position_dodge(0.5) # move them .05 to the left and right


plot_margin2 <- ggplot(vote_trump, aes(x = x, y = predicted, 
                                       colour = group)) + 
  
  # add the coefficients
  geom_point(aes(shape = group), position = pd, size=4) + 
  
  # remove the automatically generated legend
  guides(shape="none") + 
  
  # add a ribbon with the confidence band 
  geom_errorbar(
    aes(
      # lower and upper bound of the ribbon
      ymin = conf.low, ymax = conf.high,
      colour = group
    ), position = pd, width = .01, size = 1) + 
  
  # break y-axis every 0.25 and show it in percentages
  scale_y_continuous(breaks=seq(0, 0.25, by = 0.025), labels = scales::percent, limits=c(0, 0.25), expand = c(0, 0)) + 
  xlab("") +
  ylab("Probability of in-Person Voting Prospective Misreport") + 
  scale_x_continuous(breaks=c(0, 1), 
                     labels = c("0" = "Non Trump Supporter", "1" = "Trump Supporter"), expand = c(0, 0)) +
  
  # add the shapes in order: square, circle, and triangle 
  scale_shape_manual(values = c(15, 16, 17, 18, 8)) + 
  
  # set the shapes of points in legend 
  guides(colour = guide_legend(override.aes = list(shape = c(15, 16, 17, 18, 8)))) +
  
  # remove background, set legend position bottom
  theme_bw() + theme(text = element_text(size = 15), #panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(), legend.position = "bottom", 
                     axis.title = element_text(size = 10),
                     axis.text.y = element_text(size = 10),
                     axis.text.x = element_text(size = 12),
                     legend.text = element_text(size = 10),
                     legend.title=element_text(size=10)) +
  # legend title
  scale_color_discrete(name="Political Awareness")

plot_margin2 +  scale_colour_manual(name="Political Awareness", 
                                    values = c("1" = "#000000", "2" = "#000000", 
                                               "3" = "#000000", "4" = "#000000", "5" = "#000000"))

# save graph in overleaf
ggsave(filename = "/Users/Enrijeta/Dropbox/Apps/Overleaf/POQ_VBM_Misremember/plots/trump_news_nov20.pdf", width = 6, height = 6)



#############################################
#===========================================#
# APPENDIX TABLES WITH COUNTY-FIXED EFFECTS # 
#===========================================#
#############################################

#=====================================================#
# Full Sample Model Estimate.                         #
# DV: 1 = Past VBM Misreport validated; 0 = otherwise #
# With County Fixed-Effects                           #
#=====================================================#

# Baseline model.
m1_baseline_c <- glm(past_vbmgen1618_miresport ~ vote20_trump + contract_covid19_3 + political_awareness + 
                     pid1 + ideo7 + age + female + white + education + factor(county), 
                     data = flsurvey_dta, family = "binomial")
summary(m1_baseline_c)

# Huber-White robust standard errors, clustered by county
coeftest(m1_baseline_c, vcov=vcovHC(m1_baseline_c, type="HC0", cluster="county"))


# Model interacting Trump and political awareness
m1_trumpnews_int_c <- glm(past_vbmgen1618_miresport ~ vote20_trump + contract_covid19_3 + political_awareness + 
                          pid1 + ideo7 + age + female + white + education + political_awareness:vote20_trump + factor(county), 
                          data = flsurvey_dta, family = "binomial")
summary(m1_trumpnews_int_c)

# Huber-White robust standard errors, clustered by county
coeftest(m1_trumpnews_int_c, vcov=vcovHC(m1_trumpnews_int_c, type="HC0", cluster="county"))


# Baseline model.
m1_baseline_genpri_c <- glm(past_vbmgenpri1618_miresport ~ vote20_trump + contract_covid19_3 + political_awareness + 
                            pid2 + ideo7 + age + female + white + education + factor(county), 
                            data = flsurvey_dta, family = "binomial")
summary(m1_baseline_genpri_c)

# Huber-White robust standard errors, clustered by county
coeftest(m1_baseline_genpri_c, vcov=vcovHC(m1_baseline_genpri_c, type="HC0", cluster="county"))


# Model interacting Trump and political awareness
m1_trumpnews_int_genpri_c <- glm(past_vbmgenpri1618_miresport ~ vote20_trump + contract_covid19_3 + political_awareness + 
                                pid2 + ideo7 + age + female + white + education + political_awareness:vote20_trump +  
                                factor(county), data = flsurvey_dta, family = "binomial")
summary(m1_trumpnews_int_genpri_c)

# Huber-White robust standard errors, clustered by county
coeftest(m1_trumpnews_int_genpri_c, vcov=vcovHC(m1_trumpnews_int_genpri_c, type="HC0", cluster="county"))


#========================================#
# Model Output with County Fixed-Effects #
#========================================#

# Model Output
texreg(list(m1_baseline_c, m1_trumpnews_int_c, m1_baseline_genpri_c, m1_trumpnews_int_genpri_c),
       caption="Full Sample, Misreporting Past VBM Usage with County-Fixed-Effects",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("m1_baseline_c", "m1_trumpnews_int_c", "m1_baseline_genpri_c", "m1_trumpnews_int_genpri_c"),
       override.se=list(summary(m1_baseline_c)$coef[,2],
                        summary(m1_trumpnews_int_c)$coef[,2],
                        summary(m1_baseline_genpri_c)$coef[,2],
                        summary(m1_trumpnews_int_genpri_c)$coef[,2],
                        override.pval=list(summary(m1_baseline_c)$coef[,4],
                                           summary(m1_trumpnews_int_c)$coef[,4],
                                           summary(m1_baseline_genpri_c)$coef[,4],
                                           summary(m1_trumpnews_int_genpri_c)$coef[,4])))


#===================================================#
# Full Sample Model Estimate.                       #
# DV: 1 = VBM 20 Misreport validated; 0 = otherwise #
# with County Fixed-Effects                         #
#===================================================#

# Baseline model.
m2_baseline_c <- glm(votevbm20_misreport ~ vote20_trump + contract_covid19_3 + past_vbmgen1618_miresport + 
                     political_awareness + pid1 + ideo7 + age + female + white + education + 
                     factor(county), data = flsurvey_dta, family = "binomial")
summary(m2_baseline_c)

# Huber-White robust standard errors, clustered by county
coeftest(m2_baseline_c, vcov=vcovHC(m2_baseline_c, type="HC0", cluster="county"))


# Model interacting Trump and political awareness
m2_trumpnews_int_c <- glm(votevbm20_misreport ~ vote20_trump + contract_covid19_3 + past_vbmgen1618_miresport + 
                          political_awareness + pid1 + ideo7 + age + female + white + education + 
                          political_awareness:vote20_trump + factor(county), data = flsurvey_dta, family = "binomial")
summary(m2_trumpnews_int_c)

# Huber-White robust standard errors, clustered by county
coeftest(m2_trumpnews_int_c, vcov=vcovHC(m2_trumpnews_int_c, type="HC0", cluster="county"))


# Baseline model.
m2_baseline_genpri_c <- glm(votevbm20_misreport ~ vote20_trump + contract_covid19_3 + past_vbmgenpri1618_miresport + 
                            political_awareness + pid2 + ideo7 + age + female + white + education + 
                            factor(county), data = flsurvey_dta, family = "binomial")
summary(m2_baseline_genpri_c)

# Huber-White robust standard errors, clustered by county
coeftest(m2_baseline_genpri_c, vcov=vcovHC(m2_baseline_genpri_c, type="HC0", cluster="county"))


# Model interacting Trump and political awareness
m2_trumpnews_int_genpri_c <- glm(votevbm20_misreport ~ vote20_trump + contract_covid19_3 + past_vbmgenpri1618_miresport + 
                                 political_awareness + pid2 + ideo7 + age + female + white + education + 
                                 political_awareness:vote20_trump + factor(county), data = flsurvey_dta, family = "binomial")
summary(m2_trumpnews_int_genpri_c)

# Huber-White robust standard errors, clustered by county
coeftest(m2_trumpnews_int_genpri_c, vcov=vcovHC(m2_trumpnews_int_genpri_c, type="HC0", cluster="county"))


#========================================#
# Model Output with County Fixed-Effects #
#========================================#

# Model Output
texreg(list(m2_baseline_c, m2_trumpnews_int_c, m2_baseline_genpri_c, m2_trumpnews_int_genpri_c),
       caption="Full Sample, Misreporting Nov20 VBM vote",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("m2_baseline",  "m2_trumpnews_int"),
       override.se=list(summary(m2_baseline_c)$coef[,2],
                        summary(m2_trumpnews_int_c)$coef[,2],
                        summary(m2_baseline_genpri_c)$coef[,2],
                        summary(m2_trumpnews_int_genpri_c)$coef[,2],
                        override.pval=list(summary(m2_baseline_c)$coef[,4],
                                           summary(m2_trumpnews_int_c)$coef[,4],
                                           summary(m2_baseline_genpri_c)$coef[,4],
                                           summary(m2_trumpnews_int_genpri_c)$coef[,4])))


#++++++++++++++++++++++++++++++++++++++#
#======================================#
# Appendix: Model Replication for VBM  #
#======================================#
#++++++++++++++++++++++++++++++++++++++#

# VBM 2020 vote validated, using self-reported vote method and the voter file item: 1 = VBM misreport, 0 = other 
flsurvey_dta$votevbm20_misreport_gen1618 <- ifelse((flsurvey_dta$vote20_method != 1 & flsurvey_dta$vote_vbm20 == 1) & 
                                                    flsurvey_dta$habitual_vbm_gen1618==1 , 1, 0)
table(flsurvey_dta$votevbm20_misreport_gen1618)


# VBM 2020 vote validated, using self-reported vote method and the voter file item: 1 = VBM misreport, 0 = other 
flsurvey_dta$votevbm20_misreport_genpri1618 <- ifelse((flsurvey_dta$vote20_method != 1 & flsurvey_dta$vote_vbm20 == 1) &
                                                       flsurvey_dta$habitual_vbm_genpri1618==1 , 1, 0)
table(flsurvey_dta$votevbm20_misreport_genpri1618)


#===============================================================================#
# Full Sample Model Estimate. DV: 1 = VBM 20 Misreport validated; 0 = otherwise #
#===============================================================================#

# Baseline model. 
m2_baseline_gen1618 <- glm(votevbm20_misreport_gen1618 ~ vote20_trump + contract_covid19_3 + political_awareness + 
                           pid1 + ideo7 + age + female + white + education, data = flsurvey_dta, family = "binomial")
summary(m2_baseline_gen1618)

# Huber-White robust standard errors, clustered by county
coeftest(m2_baseline_gen1618, vcov=vcovHC(m2_baseline_gen1618, type="HC0", cluster="county"))


# Model interacting Trump and political awareness
m2_trumpnews_int_gen1618 <- glm(votevbm20_misreport_gen1618 ~ vote20_trump + contract_covid19_3 + political_awareness + 
                                pid1 + ideo7 + age + female + white + education + political_awareness:vote20_trump, 
                                data = flsurvey_dta, family = "binomial")
summary(m2_trumpnews_int_gen1618)

# Huber-White robust standard errors, clustered by county
coeftest(m2_trumpnews_int_gen1618, vcov=vcovHC(m2_trumpnews_int_gen1618, type="HC0", cluster="county"))


# Baseline model. 
m2_baseline_genpri1618 <- glm(votevbm20_misreport_genpri1618 ~ vote20_trump + contract_covid19_3  + political_awareness + 
                              pid2 + ideo7 + age + female + white + education, data = flsurvey_dta, family = "binomial")
summary(m2_baseline_genpri1618)

# Huber-White robust standard errors, clustered by county
coeftest(m2_baseline_genpri1618, vcov=vcovHC(m2_baseline_genpri1618, type="HC0", cluster="county"))


# Model interacting Trump and political awareness
m2_trumpnews_int_genpri1618 <- glm(votevbm20_misreport_genpri1618 ~ vote20_trump + contract_covid19_3 + political_awareness + 
                                   pid2 + ideo7 + age + female + white + education + political_awareness:vote20_trump, 
                                   data = flsurvey_dta, family = "binomial")
summary(m2_trumpnews_int_genpri1618)

# Huber-White robust standard errors, clustered by county
coeftest(m2_trumpnews_int_genpri1618, vcov=vcovHC(m2_trumpnews_int_genpri1618, type="HC0", cluster="county"))


#==============#
# Model Output #
#==============#

# Model Output
texreg(list(m2_baseline_gen1618, m2_trumpnews_int_gen1618, m2_baseline_genpri1618, m2_trumpnews_int_genpri1618),
       caption="Full Sample, Misreporting Nov20 VBM vote",
       digits = 3,
       dcolumn=FALSE,
       model.names=c("m2_baseline_gen1618",  "m2_trumpnews_int_gen1618"),
       override.se=list(summary(m2_baseline_gen1618)$coef[,2],
                        summary(m2_trumpnews_int_gen1618)$coef[,2],
                        summary(m2_baseline_genpri1618)$coef[,2],
                        summary(m2_trumpnews_int_genpri1618)$coef[,2],
                        override.pval=list(summary(m2_baseline_gen1618)$coef[,4],
                                           summary(m2_trumpnews_int_gen1618)$coef[,4],
                                           summary(m2_baseline_genpri1618)$coef[,4],
                                           summary(m2_trumpnews_int_genpri1618)$coef[,4])))
